/* This code is part of the tng compression routines.
 *
 * Written by Daniel Spangberg and Magnus Lundborg
 * Copyright (c) 2010, 2013-2014 The GROMACS development team.
 *
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the Revised BSD License.
 */

/* This code is heavily influenced by
   http://hpcv100.rc.rug.nl/xdrf.html
   Based on coordinate compression (c) by Frans van Hoesel.
   and GROMACS xtc files (http://www.gromacs.org)
   (c) Copyright (c) Erik Lindahl, David van der Spoel
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "../../include/compression/coder.h"
#include "../../include/compression/widemuldiv.h"
#include "../../include/compression/warnmalloc.h"

/* Generated by gen_magic.py */
#define MAX_MAGIC  92

#ifdef USE_WINDOWS
#define TNG_INLINE __inline
#else
#define TNG_INLINE inline
#endif

static unsigned int magic[MAX_MAGIC]={
2U,  3U,  4U,  5U,
6U,  8U,  10U,  12U,
16U,  20U,  25U,  32U,
40U,  50U,  64U,  80U,
101U,  128U,  161U,  203U,
256U,  322U,  406U,  512U,
645U,  812U,  1024U,  1290U,
1625U,  2048U,  2580U,  3250U,
4096U,  5160U,  6501U,  8192U,
10321U,  13003U,  16384U,  20642U,
26007U,  32768U,  41285U,  52015U,
65536U,  82570U,  104031U,  131072U,
165140U,  208063U,  262144U,  330280U,
416127U,  524288U,  660561U,  832255U,
1048576U,  1321122U,  1664510U,  2097152U,
2642245U,  3329021U,  4194304U,  5284491U,
6658042U,  8388608U,  10568983U,  13316085U,
16777216U,  21137967U,  26632170U,  33554432U,
42275935U,  53264340U,  67108864U,  84551870U,
106528681U,  134217728U,  169103740U,  213057362U,
268435456U,  338207481U,  426114725U,  536870912U,
676414963U,  852229450U,  1073741824U,  1352829926U,
1704458900U,  2147483648U,  2705659852U,  3408917801U,
};

static unsigned int magic_bits[MAX_MAGIC][8]={
{ 3,  6,  9,  12,  15,  18,  21,  24,  },
{ 5,  10,  15,  20,  24,  29,  34,  39,  },
{ 6,  12,  18,  24,  30,  36,  42,  48,  },
{ 7,  14,  21,  28,  35,  42,  49,  56,  },
{ 8,  16,  24,  32,  39,  47,  55,  63,  },
{ 9,  18,  27,  36,  45,  54,  63,  72,  },
{ 10,  20,  30,  40,  50,  60,  70,  80,  },
{ 11,  22,  33,  44,  54,  65,  76,  87,  },
{ 12,  24,  36,  48,  60,  72,  84,  97,  },
{ 13,  26,  39,  52,  65,  78,  91,  104,  },
{ 14,  28,  42,  56,  70,  84,  98,  112,  },
{ 15,  30,  45,  60,  75,  90,  105,  120,  },
{ 16,  32,  48,  64,  80,  96,  112,  128,  },
{ 17,  34,  51,  68,  85,  102,  119,  136,  },
{ 18,  36,  54,  72,  90,  108,  127,  144,  },
{ 19,  38,  57,  76,  95,  114,  133,  152,  },
{ 20,  40,  60,  80,  100,  120,  140,  160,  },
{ 21,  42,  63,  84,  105,  127,  147,  168,  },
{ 22,  44,  66,  88,  110,  132,  154,  176,  },
{ 23,  46,  69,  92,  115,  138,  161,  184,  },
{ 24,  48,  72,  97,  120,  144,  168,  192,  },
{ 25,  50,  75,  100,  125,  150,  175,  200,  },
{ 26,  52,  78,  104,  130,  156,  182,  208,  },
{ 27,  54,  81,  108,  135,  162,  190,  216,  },
{ 28,  56,  84,  112,  140,  168,  196,  224,  },
{ 29,  58,  87,  116,  145,  174,  203,  232,  },
{ 30,  60,  90,  120,  150,  180,  210,  240,  },
{ 31,  62,  93,  124,  155,  186,  217,  248,  },
{ 32,  64,  96,  128,  160,  192,  224,  256,  },
{ 33,  66,  99,  132,  165,  198,  231,  264,  },
{ 34,  68,  102,  136,  170,  204,  238,  272,  },
{ 35,  70,  105,  140,  175,  210,  245,  280,  },
{ 36,  72,  108,  144,  180,  216,  252,  288,  },
{ 37,  74,  111,  148,  185,  222,  259,  296,  },
{ 38,  76,  114,  152,  190,  228,  266,  304,  },
{ 39,  78,  117,  157,  195,  234,  273,  312,  },
{ 40,  80,  120,  160,  200,  240,  280,  320,  },
{ 41,  82,  123,  164,  205,  246,  287,  328,  },
{ 42,  84,  127,  168,  210,  252,  294,  336,  },
{ 43,  86,  129,  172,  215,  258,  301,  344,  },
{ 44,  88,  132,  176,  220,  264,  308,  352,  },
{ 45,  90,  135,  180,  225,  270,  315,  360,  },
{ 46,  92,  138,  184,  230,  276,  322,  368,  },
{ 47,  94,  141,  188,  235,  282,  329,  376,  },
{ 48,  97,  144,  192,  240,  288,  336,  384,  },
{ 49,  98,  147,  196,  245,  294,  343,  392,  },
{ 50,  100,  150,  200,  250,  300,  350,  400,  },
{ 52,  102,  153,  204,  255,  306,  357,  408,  },
{ 52,  104,  156,  208,  260,  312,  364,  416,  },
{ 53,  106,  159,  212,  265,  318,  371,  424,  },
{ 54,  108,  162,  216,  270,  324,  378,  432,  },
{ 55,  110,  165,  220,  275,  330,  385,  440,  },
{ 56,  112,  168,  224,  280,  336,  392,  448,  },
{ 57,  114,  172,  228,  285,  342,  399,  456,  },
{ 58,  116,  174,  232,  290,  348,  406,  464,  },
{ 59,  118,  177,  236,  295,  354,  413,  472,  },
{ 60,  120,  180,  240,  300,  360,  420,  480,  },
{ 61,  122,  183,  244,  305,  366,  427,  488,  },
{ 62,  124,  186,  248,  310,  372,  434,  496,  },
{ 63,  127,  190,  252,  315,  378,  442,  505,  },
{ 64,  128,  192,  256,  320,  384,  448,  512,  },
{ 65,  130,  195,  260,  325,  390,  455,  520,  },
{ 66,  132,  198,  264,  330,  396,  462,  528,  },
{ 67,  134,  201,  268,  335,  402,  469,  536,  },
{ 68,  136,  204,  272,  340,  408,  476,  544,  },
{ 69,  138,  207,  276,  345,  414,  483,  552,  },
{ 70,  140,  210,  280,  350,  420,  490,  560,  },
{ 71,  142,  213,  284,  355,  426,  497,  568,  },
{ 72,  144,  216,  288,  360,  432,  505,  576,  },
{ 73,  146,  219,  292,  365,  438,  511,  584,  },
{ 74,  148,  222,  296,  370,  444,  518,  592,  },
{ 75,  150,  225,  300,  375,  451,  525,  600,  },
{ 76,  152,  228,  304,  380,  456,  532,  608,  },
{ 77,  154,  231,  308,  385,  462,  539,  616,  },
{ 78,  157,  234,  312,  390,  469,  546,  625,  },
{ 79,  158,  237,  316,  395,  474,  553,  632,  },
{ 80,  160,  240,  320,  400,  480,  560,  640,  },
{ 81,  162,  243,  324,  406,  486,  568,  648,  },
{ 82,  164,  246,  328,  410,  492,  574,  656,  },
{ 83,  166,  249,  332,  415,  498,  581,  664,  },
{ 84,  168,  252,  336,  420,  505,  588,  672,  },
{ 85,  170,  255,  340,  425,  510,  595,  680,  },
{ 86,  172,  258,  344,  430,  516,  602,  688,  },
{ 87,  174,  261,  348,  435,  522,  609,  696,  },
{ 88,  176,  264,  352,  440,  528,  616,  704,  },
{ 89,  178,  267,  356,  445,  534,  623,  712,  },
{ 90,  180,  270,  360,  451,  540,  631,  720,  },
{ 91,  182,  273,  364,  455,  546,  637,  728,  },
{ 92,  184,  276,  368,  460,  552,  644,  736,  },
{ 94,  187,  279,  373,  466,  558,  651,  745,  },
{ 94,  188,  282,  376,  470,  564,  658,  752,  },
{ 95,  190,  285,  380,  475,  570,  665,  760,  },
};


static const double iflipgaincheck=0.89089871814033927; /*  1./(2**(1./6)) */


/* Difference in indices used for determining whether to store as large or small */
#define QUITE_LARGE 3
#define IS_LARGE 6

#if 0
#define SHOWIT
#endif

#ifdef USE_WINDOWS
#define TNG_INLINE __inline
#else
#define TNG_INLINE inline
#endif

int Ptngc_magic(const unsigned int i)
{
  return magic[i];
}

int Ptngc_find_magic_index(const unsigned int maxval)
{
  int i;

  if(maxval > magic[MAX_MAGIC/4])
  {
      if(maxval > magic[MAX_MAGIC/2])
      {
          i = MAX_MAGIC/2 + 1;
      }
      else
      {
          i = MAX_MAGIC/4 + 1;
      }
  }
  else
  {
      i = 0;
  }

  while (magic[i]<=maxval)
    i++;
  return i;
}

static TNG_INLINE unsigned int positive_int(const int item)
{
  int s=0;
  if (item>0)
    s=1+(item-1)*2;
  else if (item<0)
    s=2+(-item-1)*2;
  return s;
}

static TNG_INLINE int unpositive_int(const int val)
{
  int s=(val+1)/2;
  if ((val%2)==0)
    s=-s;
  return s;
}


/* Sequence instructions */
#define INSTR_DEFAULT 0
#define INSTR_BASE_RUNLENGTH 1
#define INSTR_ONLY_LARGE 2
#define INSTR_ONLY_SMALL 3
#define INSTR_LARGE_BASE_CHANGE 4
#define INSTR_FLIP 5
#define INSTR_LARGE_RLE 6

#define MAXINSTR 7

#ifdef SHOWIT
static char *instrnames[MAXINSTR]={
  "large+small",
  "base+run",
  "large",
  "small",
  "large base change",
  "flip",
  "large rle",
};
#endif

/* Bit patterns in the compressed code stream: */

static const int seq_instr[MAXINSTR][2]=
  {
    { 1,1 }, /* 1 : one large atom + runlength encoded small integers. Use same settings as before. */
    { 0,2 }, /* 00 : set base and runlength in next four bits (x). base (increase/keep/decrease)=x%3-1. runlength=1+x/3.
                      The special value 1111 in the four bits means runlength=6 and base change=0 */
    { 4,4 }, /* 0100 : next only a large atom comes. */
    { 5,4 }, /* 0101 : next only runlength encoded small integers. Use same settings as before. */
    { 6,4 }, /* 0110 : Large change in base. Change is encoded in the
                following 2 bits. change direction (sign) is the first
                bit. The next bit +1 is the actual change. This
                allows the change of up to +/- 2 indices. */
    { 14,5 }, /* 01110 : flip whether integers should be modified to compress water better */
    { 15,5 }, /* 01111 : Large rle. The next 4 bits encode how many
               large atoms are in the following sequence: 3-18. (2 is
               more efficiently coded with two large instructions. */
 };

static void write_instruction(struct coder *coder, const int instr, unsigned char **output_ptr)
{
  Ptngc_writebits(coder,seq_instr[instr][0],seq_instr[instr][1],output_ptr);
#ifdef SHOWIT
  fprintf(stderr,"INSTR: %s (%d bits)\n",instrnames[instr],seq_instr[instr][1]);
#endif
}

static unsigned int readbits(unsigned char **ptr, int *bitptr, int nbits)
{
  unsigned int val=0U;
  unsigned int extract_mask=0x80U>>*bitptr;
  unsigned char thisval=**ptr;
#ifdef SHOWIT
  fprintf(stderr,"Read nbits=%d val=",nbits);
#endif
  while (nbits--)
    {
      val<<=1;
      val|=((extract_mask & thisval)!=0);
      *bitptr=(*bitptr)+1;
      extract_mask>>=1;
      if (!extract_mask)
        {
          extract_mask=0x80U;
          *ptr=(*ptr)+1;
          *bitptr=0;
          if (nbits)
            thisval=**ptr;
        }
    }
#ifdef SHOWIT
  fprintf(stderr,"%x\n",val);
#endif
  return val;
}

static void readmanybits(unsigned char **ptr, int *bitptr, int nbits, unsigned char *buffer)
{
  while (nbits>=8)
    {
      *buffer++=readbits(ptr,bitptr,8);
      nbits-=8;
#ifdef SHOWIT
      fprintf(stderr,"Read value %02x\n",buffer[-1]);
#endif
    }
  if (nbits)
    {
      *buffer++=readbits(ptr,bitptr,nbits);
#ifdef SHOWIT
      fprintf(stderr,"Read value %02x\n",buffer[-1]);
#endif
    }
}

static int read_instruction(unsigned char **ptr, int *bitptr)
{
  int instr=-1;
  unsigned int bits=readbits(ptr,bitptr,1);
  if (bits)
    instr=INSTR_DEFAULT;
  else
    {
      bits=readbits(ptr,bitptr,1);
      if (!bits)
        instr=INSTR_BASE_RUNLENGTH;
      else
        {
          bits=readbits(ptr,bitptr,2);
          if (bits==0)
            instr=INSTR_ONLY_LARGE;
          else if (bits==1)
            instr=INSTR_ONLY_SMALL;
          else if (bits==2)
            instr=INSTR_LARGE_BASE_CHANGE;
          else if (bits==3)
            {
              bits=readbits(ptr,bitptr,1);
              if (bits==0)
                instr=INSTR_FLIP;
              else
                instr=INSTR_LARGE_RLE;
            }
        }
    }
  return instr;
}

/* Modifies three integer values for better compression of water */
static void swap_ints(int *in, int *out)
{
  out[0]=in[0]+in[1];
  out[1]=-in[1];
  out[2]=in[1]+in[2];
}

static void swap_is_better(int *input, int *minint, int *sum_normal, int *sum_swapped)
{
  int normal_max=0;
  int swapped_max=0;
  int i,j;
  int normal[3];
  int swapped[3];
  for (i=0; i<3; i++)
    {
      normal[0]=input[i]-minint[i];
      normal[1]=input[3+i]-input[i]; /* minint[i]-minint[i] cancels out */
      normal[2]=input[6+i]-input[3+i]; /* minint[i]-minint[i] cancels out */
      swap_ints(normal,swapped);
      for (j=1; j<3; j++)
        {
          if (positive_int(normal[j])>(unsigned int)normal_max)
            normal_max=positive_int(normal[j]);
          if (positive_int(swapped[j])>(unsigned int)swapped_max)
            swapped_max=positive_int(swapped[j]);
        }
    }
  if (normal_max==0)
    normal_max=1;
  if (swapped_max==0)
    swapped_max=1;
  *sum_normal=normal_max;
  *sum_swapped=swapped_max;
}

static void swapdecide(struct coder *coder, int *input, int *swapatoms, int *large_index, int *minint, unsigned char **output_ptr)
{
  int didswap=0;
  int normal,swapped;
  (void)large_index;
  swap_is_better(input,minint,&normal,&swapped);
  /* We have to determine if it is worth to change the behaviour.
     If diff is positive it means that it is worth something to
     swap. But it costs 4 bits to do the change. If we assume that
     we gain 0.17 bit by the swap per value, and the runlength>2
     for four molecules in a row, we gain something. So check if we
     gain at least 0.17 bits to even attempt the swap.
  */
#ifdef SHOWIT
  fprintf(stderr,"Trying Flip: %g %g\n",(double)swapped/normal, (double)normal/swapped);
#endif
  if (((swapped<normal) && (fabs((double)swapped/normal)<iflipgaincheck)) ||
      ((normal<swapped) && (fabs((double)normal/swapped)<iflipgaincheck)))
    {
      if (swapped<normal)
        {
          if (!*swapatoms)
            {
              *swapatoms=1;
              didswap=1;
            }
        }
      else
        {
          if (*swapatoms)
            {
              *swapatoms=0;
              didswap=1;
            }
        }
    }
  if (didswap)
    {
#ifdef SHOWIT
      fprintf(stderr,"Flip. Swapatoms is now %d\n",*swapatoms);
#endif
      write_instruction(coder,INSTR_FLIP,output_ptr);
    }
}

/* Compute number of bits required to store values using three different bases in the index array */
static int compute_magic_bits(int *index)
{
  unsigned int largeint[4];
  unsigned int largeint_tmp[4];
  int i,j,onebit;
  for (i=0; i<4; i++)
    largeint[i]=0U;
  for (i=0; i<3; i++)
    {
      if (i!=0)
        {
          /* We must do the multiplication of the largeint with the integer base */
          Ptngc_largeint_mul(magic[index[i]],largeint,largeint_tmp,4);
          for (j=0; j<4; j++)
            largeint[j]=largeint_tmp[j];
        }
      Ptngc_largeint_add(magic[index[i]]-1,largeint,4);
    }
  /* Find last bit. */
#if 0
  printf("Largeint is %u %u %u\n",largeint[0],largeint[1],largeint[2]);
#endif
  onebit=0;
  for (i=0; i<3; i++)
    for (j=0; j<32; j++)
      if (largeint[i]&(1U<<j))
        onebit=i*32+j+1;
  return onebit;
}

/* Convert a sequence of (hopefully) small positive integers
   using the base pointed to by index (x base, y base and z base can be different).
   The largest number of integers supported is 18 (29 to handle/detect overflow) */
static void trajcoder_base_compress(int *input, const int n, int *index, unsigned char *result)
{
  unsigned int largeint[19];
  unsigned int largeint_tmp[19];
  int i, j;

  memset(largeint, 0U, sizeof(unsigned int) * 19);

  if(n > 0)
    {
      Ptngc_largeint_add(input[0],largeint,19);
    }

  for (i=1; i<n; i++)
    {
      /* We must do the multiplication of the largeint with the integer base */
      Ptngc_largeint_mul(magic[index[i%3]],largeint,largeint_tmp,19);

      memcpy(largeint,largeint_tmp,19*sizeof *largeint);

      Ptngc_largeint_add(input[i],largeint,19);
    }
  if (largeint[18])
    {
      fprintf(stderr,"TRAJNG: BUG! Overflow in compression detected.\n");
      exit(EXIT_FAILURE);
    }
#if 0
#ifdef SHOWIT
  for (i=0; i<19; i++)
    fprintf(stderr,"Largeint[%d]=0x%x\n",i,largeint[i]);
#endif
#endif
  /* Convert the largeint to a sequence of bytes. */
  for (i=0; i<18; i++)
    {
      int shift=0;
      for (j=0; j<4; j++)
        {
          result[i*4+j]=(largeint[i]>>shift) & 0xFFU;
          shift+=8;
        }
    }
}

/* The opposite of base_compress. */
static void trajcoder_base_decompress(unsigned char *input, const int n, int *index, int *output)
{
  unsigned int largeint[19];
  unsigned int largeint_tmp[19];
  int i,j;
  /* Convert the sequence of bytes to a largeint. */
  for (i=0; i<18; i++)
    {
      int shift=0;
      largeint[i]=0U;
      for (j=0; j<4; j++)
        {
          largeint[i]|=((unsigned int)input[i*4+j])<<shift;
          shift+=8;
        }
    }
  largeint[18]=0U;
#if 0
#ifdef SHOWIT
  for (i=0; i<19; i++)
    fprintf(stderr,"Largeint[%d]=0x%x\n",i,largeint[i]);
#endif
#endif
  for (i=n-1; i>=0; i--)
    {
      unsigned int remainder=Ptngc_largeint_div(magic[index[i%3]],largeint,largeint_tmp,19);
#if 0
#ifdef SHOWIT
      fprintf(stderr,"Remainder: %u\n",remainder);
#endif
#endif
#if 0
      for (j=0; j<19; j++)
        largeint[j]=largeint_tmp[j];
#endif
      memcpy(largeint,largeint_tmp,19*sizeof *largeint);
      output[i]=remainder;
    }
}

/* It is "large" if we have to increase the small index quite a
   bit. Not so much to be rejected by the not very large check
   later. */
static int is_quite_large(int *input, const int small_index, const int max_large_index)
{
  int is=0;
  int i;
  if (small_index+QUITE_LARGE>=max_large_index)
    is=1;
  else
    {
      for (i=0; i<3; i++)
        if (positive_int(input[i])>magic[small_index+QUITE_LARGE])
          {
            is=1;
            break;
          }
    }
  return is;
}

#ifdef SHOWIT
int nbits_sum;
int nvalues_sum;
#endif

static void write_three_large(struct coder *coder, int *encode_ints, int *large_index, const int nbits, unsigned char *compress_buffer, unsigned char **output_ptr)
{
  trajcoder_base_compress(encode_ints,3,large_index,compress_buffer);
  Ptngc_writemanybits(coder,compress_buffer,nbits,output_ptr);
#ifdef SHOWIT
  fprintf(stderr,"nbits=%d (%g)\n",nbits,nbits/3.);
  nbits_sum+=nbits;
  nvalues_sum+=3;
  fprintf(stderr,"Large: %d %d %d\n",encode_ints[0],encode_ints[1],encode_ints[2]);
#endif
}

static void insert_batch(int *input_ptr, int ntriplets_left, const int *prevcoord,int *minint, int *encode_ints, const int startenc, int *nenc)
{
  int nencode=startenc*3;
  int tmp_prevcoord[3];

  tmp_prevcoord[0]=prevcoord[0];
  tmp_prevcoord[1]=prevcoord[1];
  tmp_prevcoord[2]=prevcoord[2];

  if (startenc)
    {
      int i;
      for (i=0; i<startenc; i++)
        {
          tmp_prevcoord[0]+=encode_ints[i*3];
          tmp_prevcoord[1]+=encode_ints[i*3+1];
          tmp_prevcoord[2]+=encode_ints[i*3+2];
#ifdef SHOWIT
          fprintf(stderr,"%6d: %6d %6d %6d\t\t%6d %6d %6d\t\t%6d %6d %6d\n",i*3,
                  tmp_prevcoord[0],tmp_prevcoord[1],tmp_prevcoord[2],
                  encode_ints[i*3],
                  encode_ints[i*3+1],
                  encode_ints[i*3+2],
                  positive_int(encode_ints[i*3]),
                  positive_int(encode_ints[i*3+1]),
                  positive_int(encode_ints[i*3+2]));
#endif
        }
    }

#ifdef SHOWIT
  fprintf(stderr,"New batch\n");
#endif
  while ((nencode<21) && (nencode<ntriplets_left*3))
    {
      encode_ints[nencode]=input_ptr[nencode]-minint[0]-tmp_prevcoord[0];
      encode_ints[nencode+1]=input_ptr[nencode+1]-minint[1]-tmp_prevcoord[1];
      encode_ints[nencode+2]=input_ptr[nencode+2]-minint[2]-tmp_prevcoord[2];
#ifdef SHOWIT
      fprintf(stderr,"%6d: %6d %6d %6d\t\t%6d %6d %6d\t\t%6d %6d %6d\n",nencode,
              input_ptr[nencode]-minint[0],
              input_ptr[nencode+1]-minint[1],
              input_ptr[nencode+2]-minint[2],
              encode_ints[nencode],
              encode_ints[nencode+1],
              encode_ints[nencode+2],
              positive_int(encode_ints[nencode]),
              positive_int(encode_ints[nencode+1]),
              positive_int(encode_ints[nencode+2]));
#endif
      tmp_prevcoord[0]=input_ptr[nencode]-minint[0];
      tmp_prevcoord[1]=input_ptr[nencode+1]-minint[1];
      tmp_prevcoord[2]=input_ptr[nencode+2]-minint[2];
      nencode+=3;
    }
  *nenc=nencode;
}

static void flush_large(struct coder *coder, int *has_large, int *has_large_ints, const int n,
                        int *large_index, const int large_nbits, unsigned char *compress_buffer,
                        unsigned char **output_ptr)
{
  int i;
  if (n<3)
    {
      for (i=0; i<n; i++)
        {
          write_instruction(coder,INSTR_ONLY_LARGE,output_ptr);
          write_three_large(coder,has_large_ints+i*3,large_index,large_nbits,compress_buffer,output_ptr);
        }
    }
  else
    {
#if 0
      printf("CODING RLE: %d\n",n);
#endif
      write_instruction(coder,INSTR_LARGE_RLE,output_ptr);
      Ptngc_writebits(coder,n-3,4,output_ptr); /* Number of large atoms in this sequence: 3 to 18 */
      for (i=0; i<n; i++)
        write_three_large(coder,has_large_ints+i*3,large_index,large_nbits,compress_buffer,output_ptr);
    }
  if (((*has_large)-n)!=0)
    {
      int j;
      for (i=0; i<(*has_large)-n; i++)
        for (j=0; j<3; j++)
          has_large_ints[i*3+j]=has_large_ints[(i+n)*3+j];
    }
  *has_large=(*has_large)-n; /* Number of remaining large atoms in buffer */
}

static void buffer_large(struct coder *coder, int *has_large, int *has_large_ints, int *new_large_ints,
                        int *large_index, const int large_nbits, unsigned char *compress_buffer,
                        unsigned char **output_ptr)
{
  /* If it is full we must write them all. */
  if (*has_large==18)
    flush_large(coder,has_large,has_large_ints, *has_large,
                large_index,large_nbits,compress_buffer,output_ptr); /* Flush them all. */
  has_large_ints[(*has_large)*3]=new_large_ints[0];
  has_large_ints[(*has_large)*3+1]=new_large_ints[1];
  has_large_ints[(*has_large)*3+2]=new_large_ints[2];
  *has_large=(*has_large)+1;
}


unsigned char *Ptngc_pack_array_xtc2(struct coder *coder,int *input, int *length)
{
  unsigned char *output=NULL;
  unsigned char *output_ptr=NULL;
  int i,ienc,j;
  int output_length=0;
  /* Pack triplets. */
  int ntriplets=*length/3;
  int intmax;
  int max_small;
  int large_index[3];
  int max_large_index;
  int large_nbits;
  int small_index;
  int small_idx[3];
  int minint[3],maxint[3];
  int has_large=0;
  int has_large_ints[54]; /* Large cache. Up to 18 large atoms. */
  int prevcoord[3];
  int runlength=0; /* Initial runlength. "Stupidly" set to zero for
                      simplicity and explicity */
  int swapatoms=0; /* Initial guess is that we should not swap the
                      first two atoms in each large+small
                      transition */
  int didswap; /* Whether swapping was actually done. */
  int *input_ptr=input;
  int encode_ints[21]; /* Up to 3 large + 18 small ints can be encoded at once */
  int nencode;
  unsigned char compress_buffer[18*4]; /* Holds compressed result for 3 large ints or up to 18 small ints. */
  int ntriplets_left=ntriplets;
  int refused=0;
#ifdef SHOWIT
  nbits_sum=0;
  nvalues_sum=0;
#endif
  /* Allocate enough memory for output */
  output=warnmalloc(8* *length*sizeof *output);
  output_ptr=output;

  maxint[0]=minint[0]=input[0];
  maxint[1]=minint[1]=input[1];
  maxint[2]=minint[2]=input[2];

  for (i=1; i<ntriplets; i++)
    {
      for (j=0; j<3; j++)
        {
          if (input[i*3+j]>maxint[j])
            maxint[j]=input[i*3+j];
          if (input[i*3+j]<minint[j])
            minint[j]=input[i*3+j];
        }
    }

  large_index[0]=Ptngc_find_magic_index(maxint[0]-minint[0]+1);
  large_index[1]=Ptngc_find_magic_index(maxint[1]-minint[1]+1);
  large_index[2]=Ptngc_find_magic_index(maxint[2]-minint[2]+1);
  large_nbits=compute_magic_bits(large_index);
  max_large_index=large_index[0];
  if (large_index[1]>max_large_index)
    max_large_index=large_index[1];
  if (large_index[2]>max_large_index)
    max_large_index=large_index[2];

#ifdef SHOWIT
  for (j=0; j<3; j++)
    fprintf(stderr,"minint[%d]=%d. maxint[%d]=%d large_index[%d]=%d value=%d\n",j,minint[j],j,maxint[j],
            j,large_index[j],magic[large_index[j]]);
  fprintf(stderr,"large_nbits=%d\n",large_nbits);
#endif


  /* Guess initial small index */
  small_index=max_large_index/2;

  /* Find the largest value that is not large. Not large is half index of
     large. */
  max_small=magic[small_index];
  intmax=0;
  for (i=0; i<*length; i++)
    {
      int item=input[i];
      int s=positive_int(item);
      if (s>intmax)
        if (s<max_small)
          intmax=s;
    }
  /* This value is not critical, since if I guess wrong, the code will
     just insert instructions to increase this value. */
  small_index=Ptngc_find_magic_index(intmax);
#ifdef SHOWIT
  fprintf(stderr,"initial_small intmax=%d. small_index=%d value=%d\n",intmax,small_index,magic[small_index]);
#endif

  /* Store min integers */
  coder->pack_temporary_bits=32;
  coder->pack_temporary=positive_int(minint[0]);
  Ptngc_out8bits(coder,&output_ptr);
  coder->pack_temporary_bits=32;
  coder->pack_temporary=positive_int(minint[1]);
  Ptngc_out8bits(coder,&output_ptr);
  coder->pack_temporary_bits=32;
  coder->pack_temporary=positive_int(minint[2]);
  Ptngc_out8bits(coder,&output_ptr);
  /* Store max indices */
  coder->pack_temporary_bits=8;
  coder->pack_temporary=large_index[0];
  Ptngc_out8bits(coder,&output_ptr);
  coder->pack_temporary_bits=8;
  coder->pack_temporary=large_index[1];
  Ptngc_out8bits(coder,&output_ptr);
  coder->pack_temporary_bits=8;
  coder->pack_temporary=large_index[2];
  Ptngc_out8bits(coder,&output_ptr);
  /* Store initial small index */
  coder->pack_temporary_bits=8;
  coder->pack_temporary=small_index;
  Ptngc_out8bits(coder,&output_ptr);

#if 0
#ifdef SHOWIT
  for (i=0; i<ntriplets_left; i++)
    fprintf(stderr,"VALUE:%d %6d %6d %6d\n",
            i,
            input_ptr[i*3],
            input_ptr[i*3+1],
            input_ptr[i*3+2]);
#endif
#endif

  /* Initial prevcoord is the minimum integers. */
  memcpy(prevcoord, minint, 3*sizeof *prevcoord);

  while (ntriplets_left)
    {
      if (ntriplets_left<0)
        {
          fprintf(stderr,"TRAJNG: BUG! ntriplets_left<0!\n");
          exit(EXIT_FAILURE);
        }
      /* If only less than three atoms left we just write them all as large integers. Here no swapping is done! */
      if (ntriplets_left<3)
        {
          for (ienc=0; ienc<ntriplets_left; ienc++)
            {
              int jenc;
              for (jenc=0; jenc<3; jenc++)
                encode_ints[jenc]=input_ptr[ienc*3+jenc]-minint[jenc];
              buffer_large(coder,&has_large,has_large_ints,encode_ints,large_index,large_nbits,compress_buffer,&output_ptr);
              input_ptr+=3;
              ntriplets_left--;
            }
          flush_large(coder,&has_large,has_large_ints,has_large,large_index,large_nbits,compress_buffer,&output_ptr);
        }
      else
        {
          int min_runlength=0;
          int largest_required_base;
          int largest_runlength_base;
          int largest_required_index;
          int largest_runlength_index;
          int new_runlength;
          int new_small_index;
          int iter_runlength;
          int iter_small_index;
          int rle_index_dep;
          didswap=0;
          /* Insert the next batch of integers to be encoded into the buffer */
#ifdef SHOWIT
          fprintf(stderr,"Initial batch\n");
#endif
          insert_batch(input_ptr,ntriplets_left,prevcoord,minint,encode_ints,0,&nencode);

          /* First we must decide if the next value is large (does not reasonably fit in current small encoding)
             Also, if we have not written any values yet, we must begin by writing a large atom. */
          if ((input_ptr==input) || (is_quite_large(encode_ints,small_index,max_large_index)) || (refused))
            {
              /* If any of the next two atoms are large we should probably write them as large and not swap them */
              int no_swap=0;
              if ((is_quite_large(encode_ints+3,small_index,max_large_index)) || (is_quite_large(encode_ints+6,small_index,max_large_index)))
                no_swap=1;
              if (!no_swap)
                {
                  /* Next we must decide if we should swap the first
                     two values. */
#if 1
                  swapdecide(coder,input_ptr,&swapatoms,large_index,minint,&output_ptr);
#else
                  swapatoms=0;
#endif
                  /* If we should do the integer swapping manipulation we should do it now */
                  if (swapatoms)
                    {
                      didswap=1;
                      for (i=0; i<3; i++)
                        {
                          int in[3], out[3];
                          in[0]=input_ptr[i]-minint[i];
                          in[1]=input_ptr[3+i]-input_ptr[i]; /* minint[i]-minint[i] cancels out */
                          in[2]=input_ptr[6+i]-input_ptr[3+i]; /* minint[i]-minint[i] cancels out */
                          swap_ints(in,out);
                          encode_ints[i]=out[0];
                          encode_ints[3+i]=out[1];
                          encode_ints[6+i]=out[2];
                        }
                      /* We have swapped atoms, so the minimum run-length is 2 */
#ifdef SHOWIT
                      fprintf(stderr,"Swap atoms results in:\n");
                      for (i=0; i<3; i++)
                        fprintf(stderr,"%d: %6d %6d %6d\t\t%6d %6d %6d\n",i*3,
                                encode_ints[i*3],
                                encode_ints[i*3+1],
                                encode_ints[i*3+2],
                                positive_int(encode_ints[i*3]),
                                positive_int(encode_ints[i*3+1]),
                                positive_int(encode_ints[i*3+2]));

#endif
                      min_runlength=2;
                    }
                }
              if ((swapatoms) && (didswap))
                {
                  for (ienc=0; ienc<3; ienc++)
                    prevcoord[ienc]=encode_ints[ienc];
                }
              else
                {
                  for (ienc=0; ienc<3; ienc++)
                    prevcoord[ienc]=input_ptr[ienc]-minint[ienc];
                }
              /* Cache large value for later possible combination with
                 a sequence of small integers. */
              buffer_large(coder,&has_large,has_large_ints,prevcoord,large_index,large_nbits,compress_buffer,&output_ptr);
#ifdef SHOWIT
              fprintf(stderr,"Prevcoord after packing of large: %d %d %d\n",
                      prevcoord[0],prevcoord[1],prevcoord[2]);
#endif

              /* We have written a large integer so we have one less atoms to worry about */
              input_ptr+=3;
              ntriplets_left--;

              refused=0;

              /* Insert the next batch of integers to be encoded into the buffer */
#ifdef SHOWIT
              fprintf(stderr,"Update batch due to large int.\n");
#endif
              if ((swapatoms) && (didswap))
                {
                  /* Keep swapped values. */
                  for (i=0; i<2; i++)
                    for (ienc=0; ienc<3; ienc++)
                      encode_ints[i*3+ienc]=encode_ints[(i+1)*3+ienc];
                }
              insert_batch(input_ptr,ntriplets_left,prevcoord,minint,encode_ints,min_runlength,&nencode);
            }
          /* Here we should only have differences for the atom coordinates. */
          /* Convert the ints to positive ints */
          for (ienc=0; ienc<nencode; ienc++)
            {
              int pint=positive_int(encode_ints[ienc]);
              encode_ints[ienc]=pint;
            }
          /* Now we must decide what base and runlength to do. If we have swapped atoms it will be at least 2.
             If even the next atom is large, we will not do anything. */
          largest_required_base=0;
          /* Determine required base */
          for (ienc=0; ienc<min_runlength*3; ienc++)
            if (encode_ints[ienc]>largest_required_base)
              largest_required_base=encode_ints[ienc];
          /* Also compute what the largest base is for the current runlength setting! */
          largest_runlength_base=0;
          for (ienc=0; (ienc<runlength*3) && (ienc<nencode); ienc++)
            if (encode_ints[ienc]>largest_runlength_base)
              largest_runlength_base=encode_ints[ienc];

          largest_required_index=Ptngc_find_magic_index(largest_required_base);
          largest_runlength_index=Ptngc_find_magic_index(largest_runlength_base);

          if (largest_required_index<largest_runlength_index)
            {
              new_runlength=min_runlength;
              new_small_index=largest_required_index;
            }
          else
            {
              new_runlength=runlength;
              new_small_index=largest_runlength_index;
            }

          /* Only allow increase of runlength wrt min_runlength */
          if (new_runlength<min_runlength)
            new_runlength=min_runlength;

          /* If the current runlength is longer than the number of
             triplets left stop it from being so. */
          if (new_runlength>ntriplets_left)
            new_runlength=ntriplets_left;

          /* We must at least try to get some small integers going. */
          if (new_runlength==0)
            {
              new_runlength=1;
              new_small_index=small_index;
            }

          iter_runlength=new_runlength;
          iter_small_index=new_small_index;

          /* Iterate to find optimal encoding and runlength */
#ifdef SHOWIT
          fprintf(stderr,"Entering iterative loop.\n");
          fflush(stderr);
#endif

          do {
            new_runlength=iter_runlength;
            new_small_index=iter_small_index;

#ifdef SHOWIT
            fprintf(stderr,"Test new_small_index=%d Base=%d\n",new_small_index,magic[new_small_index]);
#endif
            /* What is the largest runlength
               we can do with the currently
               selected encoding? Also the max supported runlength is 6 triplets! */
            for (ienc=0; ienc<nencode && ienc<18; ienc++)
              {
                int test_index=Ptngc_find_magic_index(encode_ints[ienc]);
                if (test_index>new_small_index)
                  break;
              }
            if (ienc/3>new_runlength)
              {
                iter_runlength=ienc/3;
#ifdef SHOWIT
                fprintf(stderr,"I found a new possible runlength: %d\n",iter_runlength);
#endif
              }

            /* How large encoding do we have to use? */
            largest_runlength_base=0;
            for (ienc=0; ienc<iter_runlength*3; ienc++)
              if (encode_ints[ienc]>largest_runlength_base)
                largest_runlength_base=encode_ints[ienc];
            largest_runlength_index=Ptngc_find_magic_index(largest_runlength_base);
            if (largest_runlength_index!=new_small_index)
              {
                iter_small_index=largest_runlength_index;
#ifdef SHOWIT
                fprintf(stderr,"I found a new possible small index: %d Base=%d\n",iter_small_index,magic[iter_small_index]);
#endif
              }
          } while ((new_runlength!=iter_runlength) ||
                   (new_small_index!=iter_small_index));

#ifdef SHOWIT
          fprintf(stderr,"Exit iterative loop.\n");
          fflush(stderr);
#endif

          /* Verify that we got something good. We may have caught a
             substantially larger atom. If so we should just bail
             out and let the loop get on another lap. We may have a
             minimum runlength though and then we have to fulfill
             the request to write out these atoms! */
          rle_index_dep=0;
          if (new_runlength<3)
            rle_index_dep=IS_LARGE;
          else if (new_runlength<6)
            rle_index_dep=QUITE_LARGE;
          if ((min_runlength)
              || ((new_small_index<small_index+IS_LARGE) && (new_small_index+rle_index_dep<max_large_index))
#if 1
              || (new_small_index+IS_LARGE<max_large_index)
#endif
)
            {
              int nbits;
              if ((new_runlength!=runlength) || (new_small_index!=small_index))
                {
                  int change=new_small_index-small_index;

                  if (new_small_index<=0)
                    change=0;

                  if (change<0)
                    {
                      int ixx;
                      for (ixx=0; ixx<new_runlength; ixx++)
                        {
                          int rejected;
                          do {
                            int ixyz;
                            double isum=0.; /* ints can be almost 32 bit so multiplication will overflow. So do doubles. */
                            for (ixyz=0; ixyz<3; ixyz++)
                              {
                                /* encode_ints is already positive (and multiplied by 2 versus the original, just as magic ints) */
                                double id=encode_ints[ixx*3+ixyz];
                                isum+=id*id;
                              }
                            rejected=0;
#ifdef SHOWIT
                            fprintf(stderr,"Tested decrease %d of index: %g>=%g?\n",change,isum,(double)magic[small_index+change]*(double)magic[small_index+change]);
#endif
                            if (isum>(double)magic[small_index+change]*(double)magic[small_index+change])
                              {
#ifdef SHOWIT
                                fprintf(stderr,"Rejected decrease %d of index due to length of vector: %g>=%g\n",change,isum,(double)magic[small_index+change]*(double)magic[small_index+change]);
#endif
                                rejected=1;
                                change++;
                              }
                          } while ((change<0) && (rejected));
                          if (change==0)
                            break;
                        }
                    }

                  /* If the only thing to do is to change the base by
                     only one -1 it is probably not worth it. */
                  if (!((change==-1) && (runlength==new_runlength)))
                    {
                      /* If we have a very short runlength we do not
                         want to do large base changes.  It costs 6
                         extra bits to do -2. We gain 2/3
                         bits per value to decrease the index by -2,
                         ie 2 bits, so to any changes down we must
                         have a runlength of 3 or more to do it for
                         one molecule! If we have several molecules we
                         will gain of course, so not be so strict. */
                      if ((change==-2) && (new_runlength<3))
                        {
                          if (runlength==new_runlength)
                            change=0;
                          else
                            change=-1;
#ifdef SHOWIT
                          fprintf(stderr,"Rejected change by -2 due to too short runlenght. Change set to %d\n",change);
#endif
                        }

                      /* First adjust base using large base change instruction (if necessary) */
                      while ((change>1) || (change<-1) || ((new_runlength==6) && (change)))
                        {
                          unsigned int code=0U;
                          int this_change=change;
                          if (this_change>2)
                            this_change=2;
                          if (this_change<-2)
                            this_change=-2;
                          change-=this_change;
#ifdef SHOWIT
                          fprintf(stderr,"Large base change: %d.\n",this_change);
#endif
                          small_index+=this_change;
                          if (this_change<0)
                            {
                              code|=2U;
                              this_change=-this_change;
                            }
                          code|=(unsigned int)(this_change-1);
                          write_instruction(coder,INSTR_LARGE_BASE_CHANGE,&output_ptr);
                          Ptngc_writebits(coder,code,2,&output_ptr);
                        }
                      /* If there is still base change or runlength changes to do, we do them now. */
                      if ((new_runlength!=runlength) || (change))
                        {
                          unsigned int ichange=(unsigned int)(change+1);
                          unsigned int code=0U;
                          unsigned int irun=(unsigned int)((new_runlength-1)*3);
                          if (new_runlength==6)
                            ichange=0; /* Means no change. The change has been taken care of explicitly by a large
                                          base change instruction above. */
                          code=ichange+irun;
#ifdef SHOWIT
                          fprintf(stderr,"Small base change: %d Runlength change: %d\n",change,new_runlength);
#endif
                          small_index+=change;
                          write_instruction(coder,INSTR_BASE_RUNLENGTH,&output_ptr);
                          Ptngc_writebits(coder,code,4,&output_ptr);
                          runlength=new_runlength;
                        }
                    }
#ifdef SHOWIT
                  else
                    fprintf(stderr,"Rejected base change due to only change==-1\n");
#endif
#ifdef SHOWIT
                  fprintf(stderr,"Current small index: %d Base=%d\n",small_index,magic[small_index]);
#endif
                }
              /* If we have a large previous integer we can combine it with a sequence of small ints. */
              if (has_large)
                {
                  /* If swapatoms is set to 1 but we did actually not
                     do any swapping, we must first write out the
                     large atom and then the small. If swapatoms is 1
                     and we did swapping we can use the efficient
                     encoding. */
                  if ((swapatoms) && (!didswap))
                    {
#ifdef SHOWIT
                      fprintf(stderr,"Swapatoms was set to 1 but we did not do swapping!\n");
                      fprintf(stderr,"Only one large integer.\n");
#endif
                      /* Flush all large atoms. */
                      flush_large(coder,&has_large,has_large_ints,has_large,large_index,large_nbits,compress_buffer,&output_ptr);
#ifdef SHOWIT
                      fprintf(stderr,"Sequence of only small integers.\n");
#endif
                      write_instruction(coder,INSTR_ONLY_SMALL,&output_ptr);
                    }
                  else
                    {

#ifdef SHOWIT
                      fprintf(stderr,"Sequence of one large and small integers (good compression).\n");
#endif
                      /* Flush all large atoms but one! */
                      if (has_large>1)
                        flush_large(coder,&has_large,has_large_ints,has_large-1,large_index,large_nbits,compress_buffer,&output_ptr);
                      write_instruction(coder,INSTR_DEFAULT,&output_ptr);
                      write_three_large(coder,has_large_ints,large_index,large_nbits,compress_buffer,&output_ptr);
                      has_large=0;
                    }
                }
              else
                {
#ifdef SHOWIT
                  fprintf(stderr,"Sequence of only small integers.\n");
#endif
                  write_instruction(coder,INSTR_ONLY_SMALL,&output_ptr);
                }
              /* Base compress small integers using the current parameters. */
              nbits=magic_bits[small_index][runlength-1];
              /* The same base is used for the small changes. */
              small_idx[0]=small_index;
              small_idx[1]=small_index;
              small_idx[2]=small_index;
              trajcoder_base_compress(encode_ints,runlength*3,small_idx,compress_buffer);
#ifdef SHOWIT
              fprintf(stderr,"nbits=%d (%g)\n",nbits,nbits/(runlength*3.));
              nbits_sum+=nbits;
              nvalues_sum+=runlength*3;
              fprintf(stderr,"Runlength encoded small integers. runlength=%d\n",runlength);
#endif
              /* write out base compressed small integers */
              Ptngc_writemanybits(coder,compress_buffer,nbits,&output_ptr);
#ifdef SHOWIT
              for (ienc=0; ienc<runlength; ienc++)
                fprintf(stderr,"Small: %d %d %d\n",
                        encode_ints[ienc*3],
                        encode_ints[ienc*3+1],
                        encode_ints[ienc*3+2]);
#endif
              /* Update prevcoord. */
              for (ienc=0; ienc<runlength; ienc++)
                {
#ifdef SHOWIT
                  fprintf(stderr,"Prevcoord in packing: %d %d %d\n",
                          prevcoord[0],prevcoord[1],prevcoord[2]);
#endif
                  prevcoord[0]+=unpositive_int(encode_ints[ienc*3]);
                  prevcoord[1]+=unpositive_int(encode_ints[ienc*3+1]);
                  prevcoord[2]+=unpositive_int(encode_ints[ienc*3+2]);
                }
#ifdef SHOWIT
              fprintf(stderr,"Prevcoord in packing: %d %d %d\n",
                      prevcoord[0],prevcoord[1],prevcoord[2]);
#endif

              input_ptr+=3*runlength;
              ntriplets_left-=runlength;
            }
          else
            {
#ifdef SHOWIT
              fprintf(stderr,"Refused value: %d old is %d max is %d\n",new_small_index,small_index,max_large_index);
              fflush(stderr);
#endif
              refused=1;
            }
        }
#ifdef SHOWIT
      fprintf(stderr,"Number of triplets left is %d\n",ntriplets_left);
#endif
    }
  /* If we have large previous integers we must flush them now. */
  if (has_large)
    flush_large(coder,&has_large,has_large_ints,has_large,large_index,large_nbits,compress_buffer,&output_ptr);
  Ptngc_pack_flush(coder,&output_ptr);
  output_length=(int)(output_ptr-output);
#ifdef SHOWIT
  fprintf(stderr,"Done block: nbits=%d nvalues=%d (%g)\n",nbits_sum,nvalues_sum,(double)nbits_sum/nvalues_sum);
#endif
  *length=output_length;
  return output;
}


int Ptngc_unpack_array_xtc2(struct coder *coder, unsigned char *packed, int *output, const int length)
{
  unsigned char *ptr=packed;
  int bitptr=0;
  int minint[3];
  int large_index[3];
  int small_index;
  int prevcoord[3];
  int ntriplets_left=length/3;
  int swapatoms=0;
  int runlength=0;
  int large_nbits;
  unsigned char compress_buffer[18*4]; /* Holds compressed result for 3 large ints or up to 18 small ints. */
  int encode_ints[21]; /* Up to 3 large + 18 small ints can be encoded at once */
  (void)coder;

  /* Read min integers. */
  minint[0]=unpositive_int(readbits(&ptr,&bitptr,32));
  minint[1]=unpositive_int(readbits(&ptr,&bitptr,32));
  minint[2]=unpositive_int(readbits(&ptr,&bitptr,32));
  /* Read large indices */
  large_index[0]=readbits(&ptr,&bitptr,8);
  large_index[1]=readbits(&ptr,&bitptr,8);
  large_index[2]=readbits(&ptr,&bitptr,8);
  /* Read small index */
  small_index=readbits(&ptr,&bitptr,8);

  large_nbits=compute_magic_bits(large_index);

#ifdef SHOWIT
  fprintf(stderr,"Minimum integers: %d %d %d\n",minint[0],minint[1],minint[2]);
  fprintf(stderr,"Large indices: %d %d %d\n",large_index[0],large_index[1],large_index[2]);
  fprintf(stderr,"Small index: %d\n",small_index);
  fprintf(stderr,"large_nbits=%d\n",large_nbits);
#endif

  /* Initial prevcoord is the minimum integers. */
  memcpy(prevcoord, minint, 3*sizeof *prevcoord);

  while (ntriplets_left)
    {
      int instr=read_instruction(&ptr,&bitptr);
#ifdef SHOWIT
      if ((instr>=0) && (instr<MAXINSTR))
        fprintf(stderr,"Decoded instruction %s\n",instrnames[instr]);
#endif
      if ((instr==INSTR_DEFAULT) /* large+small */
          || (instr==INSTR_ONLY_LARGE) /* only large */
          || (instr==INSTR_ONLY_SMALL)) /* only small */
        {
          int large_ints[3]={0,0,0};
          if (instr!=INSTR_ONLY_SMALL)
            {
              /* Clear the compress buffer. */
              int i;
              for (i=0; i<18*4; i++)
                compress_buffer[i]=0;
              /* Get the large value. */
              readmanybits(&ptr,&bitptr,large_nbits,compress_buffer);
              trajcoder_base_decompress(compress_buffer,3,large_index,encode_ints);
              memcpy(large_ints, encode_ints, 3*sizeof *large_ints);
#ifdef SHOWIT
              fprintf(stderr,"large ints: %d %d %d\n",large_ints[0],large_ints[1],large_ints[2]);
#endif
            }
          if (instr!=INSTR_ONLY_LARGE)
            {
              int small_idx[3];
              int i;
              /* The same base is used for the small changes. */
              small_idx[0]=small_index;
              small_idx[1]=small_index;
              small_idx[2]=small_index;
              /* Clear the compress buffer. */
              for (i=0; i<18*4; i++)
                compress_buffer[i]=0;
              /* Get the small values. */
              readmanybits(&ptr,&bitptr,magic_bits[small_index][runlength-1],compress_buffer);
              trajcoder_base_decompress(compress_buffer,3*runlength,small_idx,encode_ints);
#ifdef SHOWIT
              for (i=0; i<runlength; i++)
                fprintf(stderr,"small ints: %d %d %d\n",encode_ints[i*3+0],encode_ints[i*3+1],encode_ints[i*3+2]);
#endif
            }
          if (instr==INSTR_DEFAULT)
            {
              /* Check for swapped atoms */
              if (swapatoms)
                {
                  /* Unswap the atoms. */
                  int i;
                  for (i=0; i<3; i++)
                    {
                      int in[3], out[3];
                      in[0]=large_ints[i];
                      in[1]=unpositive_int(encode_ints[i]);
                      in[2]=unpositive_int(encode_ints[3+i]);
                      swap_ints(in,out);
                      large_ints[i]=out[0];
                      encode_ints[i]=positive_int(out[1]);
                      encode_ints[3+i]=positive_int(out[2]);
                    }
                }
            }
          /* Output result. */
          if (instr!=INSTR_ONLY_SMALL)
            {
              /* Output large value */
              *output++=large_ints[0]+minint[0];
              *output++=large_ints[1]+minint[1];
              *output++=large_ints[2]+minint[2];
              prevcoord[0]=large_ints[0];
              prevcoord[1]=large_ints[1];
              prevcoord[2]=large_ints[2];
#ifdef SHOWIT
              fprintf(stderr,"Prevcoord after unpacking of large: %d %d %d\n",
                      prevcoord[0],prevcoord[1],prevcoord[2]);
              fprintf(stderr,"VALUE:%d %6d %6d %6d\n",
                      length/3-ntriplets_left,
                      prevcoord[0]+minint[0],
                      prevcoord[1]+minint[1],
                      prevcoord[2]+minint[2]);
#endif
              ntriplets_left--;
            }
          if (instr!=INSTR_ONLY_LARGE)
            {
              /* Output small values */
              int i;
#ifdef SHOWIT
              fprintf(stderr,"Prevcoord before unpacking of small: %d %d %d\n",
                          prevcoord[0],prevcoord[1],prevcoord[2]);
#endif
              for (i=0; i<runlength; i++)
                {
                  int v[3];
                  v[0]=unpositive_int(encode_ints[i*3]);
                  v[1]=unpositive_int(encode_ints[i*3+1]);
                  v[2]=unpositive_int(encode_ints[i*3+2]);
                  prevcoord[0]+=v[0];
                  prevcoord[1]+=v[1];
                  prevcoord[2]+=v[2];
#ifdef SHOWIT
                  fprintf(stderr,"Prevcoord after unpacking of small: %d %d %d\n",
                          prevcoord[0],prevcoord[1],prevcoord[2]);
                  fprintf(stderr,"Unpacked small values: %6d %6d %6d\t\t%6d %6d %6d\n",v[0],v[1],v[2],prevcoord[0],prevcoord[1],prevcoord[2]);
                  fprintf(stderr,"VALUE:%d %6d %6d %6d\n",
                          length/3-(ntriplets_left-i),
                          prevcoord[0]+minint[0],
                          prevcoord[1]+minint[1],
                          prevcoord[2]+minint[2]);
#endif
                  *output++=prevcoord[0]+minint[0];
                  *output++=prevcoord[1]+minint[1];
                  *output++=prevcoord[2]+minint[2];
                }
              ntriplets_left-=runlength;
            }
        }
      else if (instr==INSTR_LARGE_RLE)
        {
          int i,j;
          int large_ints[3];
          /* How many large atoms in this sequence? */
          int n=(int)readbits(&ptr,&bitptr,4)+3; /* 3-18 large atoms */
          for (i=0; i<n; i++)
            {
              /* Clear the compress buffer. */
              for (j=0; j<18*4; j++)
                compress_buffer[j]=0;
              /* Get the large value. */
              readmanybits(&ptr,&bitptr,large_nbits,compress_buffer);
              trajcoder_base_decompress(compress_buffer,3,large_index,encode_ints);
              memcpy(large_ints, encode_ints, 3*sizeof *large_ints);
              /* Output large value */
              *output++=large_ints[0]+minint[0];
              *output++=large_ints[1]+minint[1];
              *output++=large_ints[2]+minint[2];
              memcpy(prevcoord, large_ints, 3*sizeof *prevcoord);
            }
          ntriplets_left-=n;
        }
      else if (instr==INSTR_BASE_RUNLENGTH)
        {
          unsigned int code=readbits(&ptr,&bitptr,4);
          int change;
          if (code==15)
            {
              change=0;
              runlength=6;
            }
          else
            {
              int ichange=code%3;
              runlength=code/3+1;
              change=ichange-1;
            }
          small_index+=change;
        }
      else if (instr==INSTR_FLIP)
        {
          swapatoms=1-swapatoms;
        }
      else if (instr==INSTR_LARGE_BASE_CHANGE)
        {
          unsigned int ichange=readbits(&ptr,&bitptr,2);
          int change=(int)(ichange&0x1U)+1;
          if (ichange&0x2U)
            change=-change;
          small_index+=change;
        }
      else
        {
          fprintf(stderr,"TRAJNG: BUG! Encoded unknown instruction.\n");
          exit(EXIT_FAILURE);
        }
#ifdef SHOWIT
      fprintf(stderr,"Number of triplets left is %d\n",ntriplets_left);
#endif
    }
  return 0;
}
